{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "#build_medicalgraph\n",
    "import os\n",
    "import json\n",
    "from py2neo import Graph,Node"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 1录入"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_quantities(i,j,sen:list):\n",
    "    \"\"\"\n",
    "    item[i,j]是药品\n",
    "    \"\"\"\n",
    "    for item in sen:\n",
    "        if item[2] == \"quantities\":\n",
    "            if item[4] == i and item[5] == j:\n",
    "                if item[3] == \"QUA\":\n",
    "                    return item[0]\n",
    "                elif item[6] == \"QUA\":\n",
    "                    return item[1]\n",
    "            elif item[7] == i and item[8] == j:\n",
    "                if item[3] == \"QUA\":\n",
    "                    return item[0]\n",
    "                elif item[6] == \"QUA\":\n",
    "                    return item[1]\n",
    "    return \"N/A\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_idx(i,j,sen:list):\n",
    "    \"\"\"\n",
    "    item[i,j]是USE\n",
    "    \"\"\"\n",
    "    # 构造sen中use的set\n",
    "    set_sen = set()\n",
    "    for item in sen:\n",
    "        if item[3] == \"USE\":\n",
    "            set_sen.add((item[4],item[5]))\n",
    "        if item[6] == \"USE\":\n",
    "            set_sen.add((item[7],item[8]))\n",
    "    \n",
    "    set_sen = list(set_sen)\n",
    "    set_sen = sorted(set_sen,key=lambda t:int(t[0]))  \n",
    "    \n",
    "    #print(set_sen)\n",
    "    for k in range(len(set_sen)):\n",
    "        if set_sen[k][0] == i and set_sen[k][1] == j:\n",
    "            return i\n",
    "    print(\"error!!\")\n",
    "    return \"error\"\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_rel = []\n",
    "sen = []\n",
    "for line in  open(\"data_res/triple.txt\",encoding= \"utf-8\"):\n",
    "    line = line.split()\n",
    "    if len(line) >0:\n",
    "        sen.append(line)\n",
    "    elif len(line) == 0:\n",
    "        data_rel.append(sen)\n",
    "        sen = []"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "class MedicalGraph:\n",
    "    def __init__(self):\n",
    "        #cur_dir = '/'.join(os.path.abspath(__file__).split('/')[:-1])\n",
    "        #self.data_path = os.path.join(dir_path + 'KG_built/')\n",
    "        self.g = Graph(\n",
    "            host=\"localhost\",  # neo4j 搭载服务器的ip地址，ifconfig可获取到\n",
    "            http_port=7474,  # neo4j 服务器监听的端口号\n",
    "            user=\"neo4j\",  # 数据库user name，如果没有更改过，应该是neo4j\n",
    "            password=\"password\")\n",
    "        \n",
    "    '''读取文件'''\n",
    "    def read_nodes(self):        \n",
    "        # 共4类节点\n",
    "        names = [] # 方名\n",
    "        drugs = [] # 草药\n",
    "        uses = [] # 用法\n",
    "        symptoms = [] # 症状\n",
    "        \n",
    "        # 构建节点实体关系\n",
    "        rels_name_dru = [] #　方名、草药之间的关系（关系的属性是药量）\n",
    "        rels_name_anotherName = [] # 方名、又方之间的关系\n",
    "        rels_name_use = [] # 方名、用法之间的关系\n",
    "        rels_name_sym = [] # 方名、症状之间的关系\n",
    "        rels_use_sym = [] # 用法、症状之间的关系\n",
    "        \n",
    "        for line in open('data_res/DRU_CORPUS.txt',encoding=\"utf-8\"):\n",
    "            line = line.strip()\n",
    "            drugs.append(line)\n",
    "            \n",
    "        for line in open('data_res/NAM_CORPUS.txt',encoding=\"utf-8\"):\n",
    "            line = line.strip()\n",
    "            names.append(line)\n",
    "        \n",
    "        for line in open('data_res/SYM_CORPUS.txt',encoding=\"utf-8\"):\n",
    "            line = line.strip()\n",
    "            symptoms.append(line)\n",
    "            \n",
    "        for line in open('data_res/USE_CORPUS.txt',encoding=\"utf-8\"):\n",
    "            line = line.strip()\n",
    "            uses.append(line)\n",
    "        \n",
    "        data_rel = []\n",
    "        sen = []\n",
    "        for line in  open(\"data_res/triple.txt\",encoding= \"utf-8\"):\n",
    "            line = line.split()\n",
    "            if len(line) >0:\n",
    "                sen.append(line)\n",
    "            elif len(line) == 0:\n",
    "                data_rel.append(sen)\n",
    "                sen = []\n",
    "        # 构建节点实体关系\n",
    "        rels_name_dru = [] #　方名、草药之间的关系（关系的属性是药量）\n",
    "        rels_name_anotherName = [] # 方名、又方之间的关系\n",
    "        rels_name_use = [] # 方名、用法之间的关系\n",
    "        rels_name_sym = [] # 方名、症状之间的关系\n",
    "        rels_use_sym = [] # 用法、症状之间的关系\n",
    "\n",
    "\n",
    "        # 文件中存有：【NAM USE uses】【NAM SYM cure】【USE SYM match】【NAM DRU contain】【DRU QUAquantities】\n",
    "        for sen in data_rel:\n",
    "            for item in sen:\n",
    "                #  【NAM DRU contain】【quantities】\n",
    "                if item[2] == \"contain\":\n",
    "                    if item[3] == \"NAM\" and item[6] == \"DRU\":\n",
    "                        q = get_quantities(item[7],item[8],sen)###\n",
    "                        rels_name_dru.append([item[0],item[1],q])###\n",
    "                    else:\n",
    "                        print(\"error:\",item)\n",
    "                #  【NAM USE uses】\n",
    "                elif item[2] == \"uses\":\n",
    "                    if item[3] == \"NAM\" and item[6] == \"USE\":\n",
    "                        idx = get_idx(item[7],item[8],sen)###\n",
    "                        rels_name_use.append([item[0],item[1],idx])###\n",
    "                    else:\n",
    "                        print(\"error:\",item)\n",
    "                #  【NAM SYM cure】\n",
    "                elif item[2] == \"cure\":\n",
    "                    if item[3] == \"NAM\" and item[6] == \"SYM\":\n",
    "                        rels_name_sym.append([item[0],item[1]])\n",
    "                    else:\n",
    "                        print(\"error:\",item)\n",
    "                # 【USE SYM match】\n",
    "                elif item[2] == \"match\":\n",
    "                    if item[3] == \"USE\" and item[6] == \"SYM\":\n",
    "                        rels_use_sym.append([item[0],item[1]])\n",
    "                    else: \n",
    "                        print(\"error:\",item)\n",
    "                # 【NAM ANORTHERNAME unknown】\n",
    "                elif item[3] == \"NAM\" and item[6] == \"NAM\":\n",
    "                    if item[0][:2] == \"又方\" or item[1][:2] == \"又方\":\n",
    "                        rels_name_anotherName.append([item[0],item[1]])\n",
    "                               \n",
    "        return set(names), set(drugs), set(uses), set(symptoms), rels_name_dru, rels_name_anotherName, rels_name_use, rels_name_sym, rels_use_sym\n",
    "        \n",
    "    \n",
    "    '''建立节点'''\n",
    "    def create_node(self, label, nodes):\n",
    "        count = 0\n",
    "        for node_name in nodes:\n",
    "            node = Node(label, name=node_name)\n",
    "            self.g.create(node)\n",
    "            count += 1\n",
    "            #print(count, len(nodes))\n",
    "        return\n",
    "\n",
    "    '''创建知识图谱实体节点类型schema'''\n",
    "    def create_graphnodes(self):\n",
    "        Names, Drugs, Uses, Symptoms, rels_name_dru, rels_name_anotherName, rels_name_use, rels_name_sym, rels_use_sym  = self.read_nodes()\n",
    "        self.create_node('Name', Names)\n",
    "        self.create_node('Drug', Drugs)\n",
    "        self.create_node('Use', Uses)\n",
    "        self.create_node('Symptom', Symptoms)\n",
    "        return\n",
    "\n",
    "\n",
    "    '''创建实体关系边'''\n",
    "    def create_graphrels(self):\n",
    "        Names, Drugs, Uses, Symptoms, rels_name_dru, rels_name_anotherName, rels_name_use, rels_name_sym, rels_use_sym  = self.read_nodes()\n",
    "\n",
    "        self.create_relationship_withquantities('Name', 'Drug', rels_name_dru, '包含')\n",
    "        self.create_relationship_with_id('Name', 'Use', rels_name_use , '使用方法')\n",
    "        self.create_relationship('Name', 'Name', rels_name_anotherName, '又方')\n",
    "        self.create_relationship('Name', 'Symptom', rels_name_sym,'治疗')\n",
    "        self.create_relationship('Use', 'Symptom', rels_use_sym,'对应症状')\n",
    "        \"\"\"\n",
    "        shiti = [\"Name\",\"Drug\",\"Use\",\"Symptom\"]\n",
    "        cypher1 = [\"match(a)-[r]-(b:\"+ i+ \") where b.name='' delete r\" for i in shiti]\n",
    "        cypher2 = [\"match (n:\"+ i +\") where n.name='' delete n\" for i in shiti]\n",
    "        for i in cypher1:\n",
    "            self.g.run(i)\n",
    "\n",
    "        for i in cypher2:\n",
    "            self.g.run(i)\n",
    "        \"\"\"\n",
    "    '''创建实体关联边'''\n",
    "    def create_relationship(self, start_node, end_node, edges,rel_type):\n",
    "        count = 0\n",
    "        # 去重处理\n",
    "        set_edges = []\n",
    "        for edge in edges:\n",
    "            set_edges.append('###'.join(edge))\n",
    "        all = len(set(set_edges))\n",
    "        for edge in set(set_edges):\n",
    "            edge = edge.split('###')\n",
    "            p = edge[0]\n",
    "            q = edge[1]\n",
    "            \n",
    "            query = \"match(p:%s),(q:%s) where p.name='%s'and q.name='%s' create (p)-[rel:%s]->(q)\" % (\n",
    "                start_node, end_node, p, q, rel_type)\n",
    "            try:\n",
    "                self.g.run(query)\n",
    "                count += 1\n",
    "                #print(rel_type, count, all)\n",
    "            except Exception as e:\n",
    "                print(e)\n",
    "        return\n",
    "\n",
    "    '''创建实体关联边'''\n",
    "    def create_relationship_withquantities(self, start_node, end_node, edges, rel_type):\n",
    "        \"\"\"\n",
    "        edges = [a,b,c]\n",
    "        a b 是实体\n",
    "        c是关系的名字\n",
    "        \"\"\"\n",
    "        count = 0\n",
    "        # 去重处理\n",
    "        set_edges = []\n",
    "        for edge in edges:\n",
    "            set_edges.append('###'.join(edge))# entity1###entity2\n",
    "        all = len(set(set_edges))\n",
    "        \n",
    "        for edge in set(set_edges):\n",
    "            edge = edge.split('###')\n",
    "            p = edge[0]\n",
    "            q = edge[1]\n",
    "            quantiti = edge[2]\n",
    "            query = \"match(p:%s),(q:%s) where p.name='%s'and q.name='%s' create (p)-[rel:%s{分量:'%s'}]->(q)\" % (\n",
    "                start_node, end_node, p, q, rel_type, quantiti)\n",
    "            try:\n",
    "                self.g.run(query)\n",
    "                count += 1\n",
    "                #print(rel_type, count, all)\n",
    "            except Exception as e:\n",
    "                print(e)\n",
    "        return\n",
    "\n",
    "    '''创建实体关联边'''\n",
    "    def create_relationship_with_id(self, start_node, end_node, edges, rel_type):\n",
    "        \"\"\"\n",
    "        edges = [a,b,c]\n",
    "        a b 是实体\n",
    "        c是关系的顺序编号\n",
    "        \"\"\"\n",
    "        count = 0\n",
    "        # 去重处理\n",
    "        set_edges = []\n",
    "        for edge in edges:\n",
    "            set_edges.append('###'.join(edge))# entity1###entity2\n",
    "        all = len(set(set_edges))\n",
    "        \n",
    "        for edge in set(set_edges):\n",
    "            edge = edge.split('###')\n",
    "            p = edge[0]\n",
    "            q = edge[1]\n",
    "            idx = edge[2]\n",
    "            query = \"match(p:%s),(q:%s) where p.name='%s'and q.name='%s' create (p)-[rel:%s{起始索引:'%s',最大索引:149}]->(q)\" % (\n",
    "                start_node, end_node, p, q, rel_type, idx)\n",
    "            try:\n",
    "                self.g.run(query)\n",
    "                count += 1\n",
    "                #print(rel_type, count, all)\n",
    "            except Exception as e:\n",
    "                print(e)\n",
    "        return\n",
    "    \n",
    "    def run(self,str_run):\n",
    "        self.g.run(str_run)\n",
    "        return"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "if __name__ == '__main__':\n",
    "    handler = MedicalGraph()\n",
    "    handler.create_graphnodes()\n",
    "    handler.create_graphrels()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
